*Put directory path here, and put data here. Create a subfolder called "Output".
cd "Drive:\Folder"

**********************************************************************************
*GPA, lower secondary
{
use "grskkar.dta", clear
*Destring lower secondary school grad year avgaar
rename avgaar Low2ndSchGradYear
label var Low2ndSchGradYear "Lower secondary school graduation year"
*Destring grades stp
destring stp, gen(GPA) force
gen SciGPA = GPA if regexm(fagkode,"MAT") | regexm(fagkode,"NAT") | regexm(fagkode,"REA")
gen NonSciGPA = GPA if !(regexm(fagkode,"MAT") | regexm(fagkode,"NAT") | regexm(fagkode,"REA"))
keep lopenr GPA SciGPA NonSciGPA Low2ndSchGradYear
collapse (mean) GPA SciGPA Low2ndSchGradYear NonSciGPA, by(lopenr)
label var GPA "GPA (0 to 6); lower secondary school"
label var SciGPA "Science GPA (0 to 6); lower secondary school"
label var NonSciGPA "Non-Science GPA (0 to 6); lower secondary school"
*Save
save EdOutcomes.dta, replace
}
**********************************************************************************
*GPA, upper secondary (High school)
{
use "vitnemalfag.dta", clear
*Destring grades stp
destring standkar, gen(GPA_HS) force
gen SciGPA_HS = GPA_HS if regexm(fagkode,"MAT") | regexm(fagkode,"NAT") | regexm(fagkode,"REA")
gen NonSciGPA_HS = GPA_HS if !(regexm(fagkode,"MAT") | regexm(fagkode,"NAT") | regexm(fagkode,"REA"))
keep lopenr GPA_HS SciGPA_HS NonSciGPA_HS
collapse (mean) GPA_HS SciGPA_HS NonSciGPA_HS, by(lopenr)
label var GPA_HS "GPA (0 to 6); upper secondary school"
label var SciGPA_HS "Science GPA (0 to 6); upper secondary school"
label var NonSciGPA_HS "Non-Science GPA (0 to 6); upper secondary school"
*Merging lower secondary GPA and graduation year
merge 1:1 lopenr using EdOutcomes.dta
keep if _merge==3 
drop _merge
*Save
save EdOutcomes.dta, replace
}
**********************************************************************************
*High school academic tracks
{
use "vitnemal.dta", clear
*Sample selection
keep if primaer  		    == "J"
rename utdato diploma_date
gen year_diploma=substr(diploma_date,1,4)
gen month_diploma=substr(diploma_date,5,2)
gen day_diploma=substr(diploma_date,7,2)
destring year_diploma month_diploma day_diploma, force replace
bysort lopenr: egen first_deg_year  = min(year_diploma)
keep if first_deg_year==year_diploma
bysort lopenr: egen first_deg_month = min(month_diploma)
keep if first_deg_year==year_diploma & first_deg_month==month_diploma
bysort lopenr: egen first_deg_day   = min(day_diploma)
keep if first_deg_year==year_diploma & first_deg_month==month_diploma & first_deg_day==day_diploma
*Merging lower/upper secondary GPA and graduation year
merge 1:1 lopenr using EdOutcomes.dta
keep if _merge==3 
drop _merge
*STEM HS diploma concentration (nus2000_3) (Note: 2006 on due to reform; avgang grad year up to 2005)
gen STEM_HSdiploma = (NUS2000_3 == "401111") if Low2ndSchGradYear>=2005 & Low2ndSchGradYear!=. & NUS2000_3!=""
label var STEM_HSdiploma "HS diploma STEM specialization"
*Academic track, year one (STUDRETN_1)
gen Acad_track_Yr1 = (inlist(STUDRETN_1, "21", "22", "23", "60", "61", "62", "63", "64")) if STUDRETN_1!=""
label var Acad_track_Yr1 "Academic track; 1st year HS"
*Academic track, HS diploma (STUDRETN_3)
gen Acad_track_Yr3 = (inlist(STUDRETN_3, "21", "22", "23", "60", "61", "62", "63", "64")) if STUDRETN_3!=""
label var Acad_track_Yr3 "HS diploma academic track"
*Non-Academic track, HS diploma
gen NonAcad_track_Yr3 = 1 - Acad_track_Yr3
label var NonAcad_track_Yr3 "HS diploma non-academic track"
*Non-STEM academic track, HS diploma
gen NonSTEM_Acad_track_Yr3 = 1 if Acad_track_Yr3==1 & STEM_HSdiploma==0
replace NonSTEM_Acad_track_Yr3 = 0 if Acad_track_Yr3==0 | STEM_HSdiploma==1
label var NonSTEM_Acad_track_Yr3 "Non-Stem HS diploma academic track"
**Save
save EdOutcomes.dta, replace
}
**********************************************************************************
*College information
{
use "BU92_14.dta", clear
sum aar
global minyr = r(min)
global maxyr = r(max)
gen lopenr_num=lopenr
*Reshape wide, one ob per person
reshape wide BU igang, i(lopenr_num) j(aar)
*destring igang
forvalues yr=$minyr /$maxyr {
	rename igang`yr' orig_igang`yr'
	destring orig_igang`yr', gen(igang`yr') force
}
*Ever enrolled in college or higher
gen CollEver = 0
gen StartColl = .
forvalues yr=$minyr /$maxyr {
replace CollEver = 1 if igang`yr' >= 600000 & igang`yr' < 900000 
}
label var CollEver "Ever enroll in college or higher"
*Ever enrolled STEM college or higher
gen STEMCollEver = 0
forvalues yr=$minyr /$maxyr {
gen STEMdig = (mod(igang`yr',100000) - mod(igang`yr',1000)) / 1000
replace STEMCollEver = 1 if igang`yr' >= 600000 & igang`yr' < 900000 ///
	& ((STEMdig >= 51 & STEMdig <= 56) | STEMdig == 63)
drop STEMdig
}
label var STEMCollEver "Ever enroll in STEMM college or higher"

*Ever enrolled Med college or higher
gen MedCollEver = 0
forvalues yr=$minyr /$maxyr {
gen STEMdig = (mod(igang`yr',100000) - mod(igang`yr',1000)) / 1000
replace MedCollEver = 1 if igang`yr' >= 600000 & igang`yr' < 900000 ///
	& (STEMdig == 63)
drop STEMdig
}
label var MedCollEver "Ever enroll in medicine college or higher"

*Ever enrolled STEM college or higher, excluding medicine
gen STEMCollEver_NotMed = 0
forvalues yr=$minyr /$maxyr {
gen STEMdig = (mod(igang`yr',100000) - mod(igang`yr',1000)) / 1000
replace STEMCollEver_NotMed = 1 if igang`yr' >= 600000 & igang`yr' < 900000 ///
	& ((STEMdig >= 51 & STEMdig <= 56))
drop STEMdig
}
label var STEMCollEver_NotMed "Ever enroll in STEM college or higher, excluding medicine"

*Drop raw vars
drop lopenr_num BU* igang*
*Merging lower/upper secondary GPA and graduation year
merge 1:1 lopenr using EdOutcomes.dta
drop if _merge==1
drop _merge

**Save
save EdOutcomes.dta, replace
}


